In [1]:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
from datetime import timedelta
from matplotlib.ticker import MaxNLocator
%matplotlib inline

In [2]:
# plotting parameters
figure_size = (6.25984252, 4.1456693) # = 15.9, 10.53 cm

def rgb(rgb_255_tuple):
    return tuple(v/255 for v in rgb_255_tuple)

colour_op = rgb((13, 103, 53))
colour_s = rgb((254, 205, 8))
colour_p = rgb((237, 29, 36))

sns.set_palette('muted')

Raw data processing

Inflow profile calculation


In [3]:
df = pd.read_csv('CS1 data 1 min_2017-09-19.pdi.gz')

# also make forward filled
time_interval = pd.Timedelta(minutes=1)
df = df.pivot_table(values='Value', columns='TagName', index=(pd.to_datetime(df['Date'].str.cat(df['Time'], sep=' ')) - time_interval))
df.index.name = 'DateTime'

df['Time_30'] = df.index.floor('30min').time

df['EskomDayOfWeek'] = df.index.dayofweek + 1
# replace holidays with Eskom days
h_path = '..\\holidays.csv'
holidays = np.genfromtxt(h_path,  dtype=np.dtype('M'), delimiter=',', usecols=0)
holidays = list(holidays)
holidays_numbers = np.genfromtxt(h_path, dtype=int, delimiter=',', usecols=1)
holidays_numbers = list(holidays_numbers)
for h_n, h_d in zip(holidays_numbers, holidays):
    df.loc[df.index.date==h_d,'EskomDayOfWeek'] = h_n
# drop weekends
df = df[df['EskomDayOfWeek'] <= 5]
df.drop('EskomDayOfWeek', axis=1)

df.head()


Out[3]:
TagName KDCE_S01_31INS01_00IFT#505.FA_PV KDCE_S03_30INS00_00IFT#501.FA_PV KDCE_S03_30INS00_00ILT#501.FA_PV KDCE_S03_30INS00_00ILT#502.FA_PV KDCE_S03_43INS00_00ILT#503.FA_PV KDCE_S03_43INS00_00ILT#504.FA_PV KDCE_S03_43INS00_01ILT#503.FA_PV KDCE_S03_43INS00_01ILT#504.FA_PV KDCE_S03_44PMP01_Machine.FA_RF KDCE_S03_44PMP02_Machine.FA_RF KDCE_S03_44PMP03_Machine.FA_RF KDCE_S03_44PMP04_Machine.FA_RF Time_30 EskomDayOfWeek
DateTime
2017-01-03 00:00:00 190.613326 276.329897 40.915051 7.613364 79.410149 5.080543 NaN NaN 1.0 1.0 0.0 0.0 00:00:00 2
2017-01-03 00:01:00 191.185310 276.667415 40.869027 7.525664 79.218411 5.095167 NaN NaN NaN NaN NaN NaN 00:00:00 2
2017-01-03 00:02:00 191.852380 277.123861 40.811069 7.457779 78.861461 5.095167 NaN NaN 1.0 1.0 0.0 0.0 00:00:00 2
2017-01-03 00:03:00 192.684271 276.717341 40.735534 7.387303 78.712480 5.082732 NaN NaN 1.0 1.0 0.0 0.0 00:00:00 2
2017-01-03 00:04:00 193.303978 277.537946 40.648078 7.292373 78.354963 5.026379 NaN NaN 1.0 1.0 0.0 0.0 00:00:00 2

In [4]:
df = df.rename(columns = {'KDCE_S03_30INS00_00ILT#501.FA_PV': '30L D1',
                              'KDCE_S03_30INS00_00ILT#502.FA_PV': '30L D2',
                              'KDCE_S03_43INS00_00ILT#503.FA_PV': '43L D1a',
                              'KDCE_S03_43INS00_01ILT#503.FA_PV': '43L D1b',
                              'KDCE_S03_43INS00_00ILT#504.FA_PV': '43L D2a',
                              'KDCE_S03_43INS00_01ILT#504.FA_PV': '43L D2b',
                              'KDCE_S03_30INS00_00IFT#501.FA_PV': 'Flow from 43L',
                              'KDCE_S01_31INS01_00IFT#505.FA_PV': 'Flow to K1'})

In [5]:
status_col_dict = {}
for c in df.columns:
    if '_Machine.FA_RF' in c: # pump status
        item = c.split('_')[2].split('PMP')
        level = item[0] + 'L'
        pump_num = 'P' + item[1][1]
        status_col_dict[c] = level + ' ' + pump_num
        
df = df.rename(columns = status_col_dict)

In [6]:
df.head()


Out[6]:
TagName Flow to K1 Flow from 43L 30L D1 30L D2 43L D1a 43L D2a 43L D1b 43L D2b 44L P1 44L P2 44L P3 44L P4 Time_30 EskomDayOfWeek
DateTime
2017-01-03 00:00:00 190.613326 276.329897 40.915051 7.613364 79.410149 5.080543 NaN NaN 1.0 1.0 0.0 0.0 00:00:00 2
2017-01-03 00:01:00 191.185310 276.667415 40.869027 7.525664 79.218411 5.095167 NaN NaN NaN NaN NaN NaN 00:00:00 2
2017-01-03 00:02:00 191.852380 277.123861 40.811069 7.457779 78.861461 5.095167 NaN NaN 1.0 1.0 0.0 0.0 00:00:00 2
2017-01-03 00:03:00 192.684271 276.717341 40.735534 7.387303 78.712480 5.082732 NaN NaN 1.0 1.0 0.0 0.0 00:00:00 2
2017-01-03 00:04:00 193.303978 277.537946 40.648078 7.292373 78.354963 5.026379 NaN NaN 1.0 1.0 0.0 0.0 00:00:00 2

Investigate the data


In [7]:
df[['43L D1a','43L D1b']].plot()
# use a


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x2b73a8a7198>

In [8]:
df[['43L D2a','43L D2b']].plot()
# use a


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x2b7457859b0>

In [9]:
df[['43L D1a','43L D2a']].plot()


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x2b745785160>

In [10]:
df[['43L D1a','43L D2a']].sum(axis=1).plot()


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x2b704d37630>

Process selected data


In [11]:
df['44L Total pumps'] = df[[c for c in df.columns if '44L P' in c]].sum(axis=1, skipna=False)
df['44L Level'] = df[['43L D1a', '43L D2a']].sum(axis=1)  # new percentage on 10 ML, but 5 ML
df.drop(['30L D2', '43L D1a', '43L D2a', '43L D1b', '43L D2b', '30L D1', 'Flow to K1'], axis=1, inplace=True)

In [12]:
df.head()


Out[12]:
TagName Flow from 43L 44L P1 44L P2 44L P3 44L P4 Time_30 EskomDayOfWeek 44L Total pumps 44L Level
DateTime
2017-01-03 00:00:00 276.329897 1.0 1.0 0.0 0.0 00:00:00 2 2.0 84.490692
2017-01-03 00:01:00 276.667415 NaN NaN NaN NaN 00:00:00 2 NaN 84.313578
2017-01-03 00:02:00 277.123861 1.0 1.0 0.0 0.0 00:00:00 2 2.0 83.956628
2017-01-03 00:03:00 276.717341 1.0 1.0 0.0 0.0 00:00:00 2 2.0 83.795212
2017-01-03 00:04:00 277.537946 1.0 1.0 0.0 0.0 00:00:00 2 2.0 83.381342

In [13]:
df['44L Total pumps'].hist()


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x2b705e17080>

In [14]:
flow_per_pump_if_1_run = df[df['44L Total pumps'] == 1]['Flow from 43L'].mean()
flow_per_pump_if_2_run = df[df['44L Total pumps'] == 2]['Flow from 43L'].mean()/2
count_per_pump_if_1_run = df[df['44L Total pumps'] == 1]['Flow from 43L'].count()
count_per_pump_if_2_run = df[df['44L Total pumps'] == 2]['Flow from 43L'].count()

print('Average flow per pump if one running = {:.1f} L/s'.format(flow_per_pump_if_1_run))
print('Average flow per pump if two running = {:.1f} L/s'.format(flow_per_pump_if_2_run))
ans = (flow_per_pump_if_1_run*count_per_pump_if_1_run + flow_per_pump_if_2_run*count_per_pump_if_2_run) / (count_per_pump_if_1_run+count_per_pump_if_2_run)
print('Weighted average flow per pump = {:.1f} L/s'.format(ans))
ans = float('{:.1f}'.format(ans))
print(ans)


Average flow per pump if one running = 168.7 L/s
Average flow per pump if two running = 133.0 L/s
Weighted average flow per pump = 143.0 L/s
143.0

In [15]:
def calculate_inflows(level_name, pump_flowrates, capacity, feeding_level_name=None):
    # feeding_level_name is the level feeding this level
    
    number_of_pumps = len(pump_flowrates)

    calc_pump_flows = [np.nan]
    calc_inflows = [np.nan]

    for i in range(1, len(df.index)):
        pump_status = {}
        for j,_ in enumerate(pump_flowrates):
            pump_str = 'P' + str(j+1)
            pump_status[pump_str] = df[level_name + ' ' + pump_str].values[i]
        
        l_new = df[level_name + ' Level'].values[i]
        l_old = df[level_name + ' Level'].values[i-1]
        pump_flow_from_prev_lev = 0 if feeding_level_name is None else df[feeding_level_name + ' PumpFlow'].values[i]
        
        if np.isnan([v for k,v in pump_status.items()] + [l_new] + [l_old] + [pump_flow_from_prev_lev]).any():
            ans_pumpflow = np.nan
            ans_inflow = np.nan

        delta_level = l_new - l_old
        if delta_level != 0:
            ans_outflow_pumps = 0
            for ps, pf in zip(pump_status.items(), pump_flowrates):
                ans_outflow_pumps += ps[1] * pf
                
            ans_inflow = ((l_new - l_old) / 100 * capacity) / 60 + ans_outflow_pumps - pump_flow_from_prev_lev
        else:
            ans_outflow_pumps = np.nan
            ans_inflow = np.nan

        calc_pump_flows.append(ans_outflow_pumps)
        calc_inflows.append(ans_inflow)

    df[level_name + ' PumpFlow'] = calc_pump_flows
    df[level_name + ' Inflow'] = calc_inflows

In [16]:
calculate_inflows('44L', [ans]*4, 5000000)

In [17]:
df.head()


Out[17]:
TagName Flow from 43L 44L P1 44L P2 44L P3 44L P4 Time_30 EskomDayOfWeek 44L Total pumps 44L Level 44L PumpFlow 44L Inflow
DateTime
2017-01-03 00:00:00 276.329897 1.0 1.0 0.0 0.0 00:00:00 2 2.0 84.490692 NaN NaN
2017-01-03 00:01:00 276.667415 NaN NaN NaN NaN 00:00:00 2 NaN 84.313578 NaN NaN
2017-01-03 00:02:00 277.123861 1.0 1.0 0.0 0.0 00:00:00 2 2.0 83.956628 286.0 -11.458425
2017-01-03 00:03:00 276.717341 1.0 1.0 0.0 0.0 00:00:00 2 2.0 83.795212 286.0 151.486670
2017-01-03 00:04:00 277.537946 1.0 1.0 0.0 0.0 00:00:00 2 2.0 83.381342 286.0 -58.891881

Inflow profile


In [18]:
df2 = df.copy()

date_start = '2017-05-31'
date_end = '2017-06-01'

df2 = df2[(df2.index>=date_start) & (df2.index<date_end)]

inflow_profiles = []
overall_day_completeness_count = 0
overall_day_completeness_max = 0
overall_completeness_count = 0
overall_completeness_max = 0
for l in ['44L']:
    grouped = df2.set_index('Time_30')[l + ' Inflow'].groupby('Time_30')
    
    grouped_mean = grouped.mean()
    inflow_profiles.append(grouped_mean)
    
    print('{} completeness: {:6.2f}% → {:6.2f}%'.format(l, grouped.count().sum()/48/30*100, grouped_mean.count()/48*100))#grouped.count()/30
    overall_day_completeness_count += grouped.count().sum()
    overall_day_completeness_max += 48*30
    overall_completeness_count += grouped_mean.count()
    overall_completeness_max += 48
print('                   ------   -------')
print('All completenes:  {:6.2f}% → {:6.2f}%'.format(overall_day_completeness_count/overall_day_completeness_max*100, overall_completeness_count/overall_completeness_max*100))
    
pd.DataFrame(inflow_profiles).T.to_csv('..\..\simulations\Case_study_1\input\CS1_dam_inflow_profiles.csv.gz', compression='gzip')


44L completeness: 100.00% → 100.00%
                   ------   -------
All completenes:  100.00% → 100.00%

Data for validation of the simulation


In [19]:
df_real = pd.read_csv('CS1 data 1s 05-31_2017-09-27_pivot.csv.gz')
df_real = df_real.set_index('DateTime')
df_real.index = pd.to_datetime(df_real.index)
df_real.head()


Out[19]:
KDCE_S03_30INS00_00IFT#501.FA_PV KDCE_S03_30INS00_00ILT#501.FA_PV KDCE_S03_43INS00_00ILT#503.FA_PV KDCE_S03_43INS00_00ILT#504.FA_PV KDCE_S03_44PMP01_Machine.FA_RF KDCE_S03_44PMP02_Machine.FA_RF KDCE_S03_44PMP03_Machine.FA_RF KDCE_S03_44PMP04_Machine.FA_RF
DateTime
2017-05-31 00:00:00 260.783131 36.701481 4.192639 71.770134 0.0 1.0 0.0 1.0
2017-05-31 00:00:01 260.949493 36.701481 4.192639 71.770134 0.0 1.0 0.0 1.0
2017-05-31 00:00:02 261.012313 36.701481 4.192639 71.681156 0.0 1.0 0.0 1.0
2017-05-31 00:00:03 261.015103 36.701481 4.192639 71.679736 0.0 1.0 0.0 1.0
2017-05-31 00:00:04 260.776504 36.633303 4.192639 71.753052 0.0 1.0 0.0 1.0

In [20]:
df_real = df_real.rename(columns = {'KDCE_S03_30INS00_00ILT#501.FA_PV': '30L D1',
                                    'KDCE_S03_43INS00_00ILT#503.FA_PV': '43L D1a',
                                    'KDCE_S03_43INS00_00ILT#504.FA_PV': '43L D2a',
                                    'KDCE_S03_30INS00_00IFT#501.FA_PV': 'Flow from 43L'})

status_col_dict = {}
for c in df_real.columns:
    if '_Machine.FA_RF' in c: # pump status
        item = c.split('_')[2].split('PMP')
        level = item[0] + 'L'
        pump_num = 'P' + item[1][1]
        status_col_dict[c] = level + ' ' + pump_num
df_real = df_real.rename(columns = status_col_dict)

df_real['44L Status'] = df_real[[c for c in df_real.columns if '44L P' in c]].sum(axis=1, skipna=False)

df_real = df_real.rename(columns = {'30L D1': '30L Level'})
df_real['44L Level'] = df_real[['43L D1a', '43L D2a']].sum(axis=1)  # new percentage on 10 ML

df_real.drop(['43L D1a', '43L D2a', 'Flow from 43L', '30L Level'], axis=1, inplace=True)

df_real.head()


Out[20]:
44L P1 44L P2 44L P3 44L P4 44L Status 44L Level
DateTime
2017-05-31 00:00:00 0.0 1.0 0.0 1.0 2.0 75.962773
2017-05-31 00:00:01 0.0 1.0 0.0 1.0 2.0 75.962773
2017-05-31 00:00:02 0.0 1.0 0.0 1.0 2.0 75.873795
2017-05-31 00:00:03 0.0 1.0 0.0 1.0 2.0 75.872375
2017-05-31 00:00:04 0.0 1.0 0.0 1.0 2.0 75.945691

In [21]:
date_range_start = '2017-05-31'
date_range_end = '2017-06-01'

level_list = ['44L']
level_tag_list = [l + ' Level' for l in level_list]
status_tag_list = [l + ' Status' for l in level_list]
tag_list = level_tag_list + status_tag_list

df_real2 = df_real.ix[date_range_start:date_range_end]

print('Data completeness:', df_real2[tag_list].count(axis=0).sum()/(2 * 60*60*24))

df_real2[level_tag_list].plot()
df_real2[status_tag_list].plot()


C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:9: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if __name__ == '__main__':
Data completeness: 1.0
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x2b70a77a080>

In [22]:
df_real2['44L Status'] = df_real2['44L Status'].interpolate('pchip')
df_real2['44L Level'] = df_real2['44L Level'].interpolate('pchip')

Save simulation testing data


In [23]:
df_real2[['44L Status', '44L Level']].to_csv('..\..\simulations\Case_study_1\input\CS1_data_for_validation.csv.gz', compression='gzip')

Now perform the simulation in validation mode

If the simulation outputs are reasonably "the same" as the real data, the simulation is "correct"

Run the simulation in validation_mode using initial values for dam level and following pump status instead of scheduler


In [24]:
df_sim = pd.read_csv('..\..\simulations\Case_study_1\output\CS1_simulation_data_export_validation.csv.gz')

df_sim['time_clean'] = pd.to_datetime(df_sim['seconds'], unit='s') + timedelta(days=17317)
df_sim.head()


Out[24]:
seconds 44L Level 44L Status Eskom ToU Pump system total power time_clean
0 0 75.962773 2.0 3 3800.0 2017-05-31 00:00:00
1 1 75.961277 2.0 3 3800.0 2017-05-31 00:00:01
2 2 75.959780 2.0 3 3800.0 2017-05-31 00:00:02
3 3 75.958283 2.0 3 3800.0 2017-05-31 00:00:03
4 4 75.956786 2.0 3 3800.0 2017-05-31 00:00:04

In [25]:
def rmse(real, sim):
    return np.sqrt(((real - sim) ** 2).mean())

def r2(x, y):
    return stats.pearsonr(x,y)[0] ** 2

Comparison of simulation and actual data

44L pump status


In [26]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['44L Status'].values
y2 = df_sim['44L Status'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  1.0
RMSE =  0.0

In [27]:
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['44L Status'], marker=None, label='Actual data', zorder=10)
ax1.plot(x, df_sim['44L Status'], marker=None, label='Simulation data', zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 4])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))

ax1.axvspan('2017-05-31 00:00:00','2017-05-31 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2017-05-31 06:00:00','2017-05-31 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2017-05-31 07:00:00','2017-05-31 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2017-05-31 10:00:00','2017-05-31 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2017-05-31 18:00:00','2017-05-31 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2017-05-31 20:00:00','2017-05-31 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2017-05-31 22:00:00','2017-05-31 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)
#ax1.grid(which='minor', alpha=0.25)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()

fig.savefig('output/CS1_validation_44l_status.png', bbox_inches='tight')

plt.close('all')


44L dam level


In [28]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['44L Level'].values
y2 = df_sim['44L Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  0.996373742526
RMSE =  1.24991693065

In [29]:
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['44L Level'], marker=None, label='Actual data', zorder=10)
ax1.plot(x, df_sim['44L Level'], marker=None, label='Simulation data', zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2017-05-31 00:00:00','2017-05-31 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2017-05-31 06:00:00','2017-05-31 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2017-05-31 07:00:00','2017-05-31 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2017-05-31 10:00:00','2017-05-31 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2017-05-31 18:00:00','2017-05-31 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2017-05-31 20:00:00','2017-05-31 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2017-05-31 22:00:00','2017-05-31 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)
#ax1.grid(which='minor', alpha=0.25)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()

fig.savefig('output/CS1_validation_44l_level.png', bbox_inches='tight')

plt.close('all')


Processing of x-factored simulation data


In [30]:
def tou_shade(ax, date):
    y_m_d = '{}-{:02}-{:02}'.format(date.year, date.month, date.day)
    ax.axvspan(y_m_d + ' 00:00:00', y_m_d + ' 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
    ax.axvspan(y_m_d + ' 06:00:00', y_m_d + ' 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
    ax.axvspan(y_m_d + ' 07:00:00', y_m_d + ' 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
    ax.axvspan(y_m_d + ' 10:00:00', y_m_d + ' 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 18:00:00', y_m_d + ' 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 20:00:00', y_m_d + ' 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 22:00:00', y_m_d + ' 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

def sim_plot(case_study, factor, mode, level_name, level_limits, x, y, y_lim=100, save_fig=False, chart_type_override=None):
    fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

    if mode != 'power' and mode != 'power_raw':
        for l in level_limits:
            ax1.plot([x[0], x[len(x)-1]], [l, l], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')

        lines1 = ax1.plot(x, df[level_name + ' Level'])

        ax2 = ax1.twinx()
        lines2 = ax2.plot(x, df[level_name + ' Status'], color=sns.color_palette('muted')[2])
        
        tou_shade(ax2, x[0])

        ax1.set_ylabel('Dam level (%)')
        ax1.set_ylim([0, y_lim])
        ax2.set_ylabel('Number of pumps running')
        ax2.yaxis.set_major_locator(MaxNLocator(integer=True))
        
        ax1.set_zorder(ax2.get_zorder()+1)
        ax1.patch.set_visible(False)

        handles, labels = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        del handles[1]
        del labels[1]
        handles += handles2
        labels += labels2
        order = [3, 1, 4, 0, 5, 2]
        ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

        ax1.yaxis.label.set_color(sns.color_palette('muted')[0])
        ax2.yaxis.label.set_color(sns.color_palette('muted')[2])
    
    else:
        lbl = 'Power (resampled, step plot)' if mode == 'power' else 'Power (raw data, 1 second)'
        ax1.plot(x, y, drawstyle='steps-post', label=lbl, zorder=3)
        tou_shade(ax1, x[0])
        ax1.set_ylabel('Power (kW)')
        ax1.set_ylim([0, y_lim])
        
        handles, labels = ax1.get_legend_handles_labels()
        order = [1, 0, 2, 3]
        ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)
    
    ax1.set_xlabel('Time of day')
    if mode != 'power' and mode != 'power_raw':
        ax1.yaxis.set_ticks(np.arange(0, 101, 10))
    ax1.xaxis.set_major_locator(mdates.HourLocator())
    ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    fig.autofmt_xdate(rotation=90)
    ax1.set_xlim((min(x), max(x)))

    ax1.grid(which='major', alpha=0.5)
    
    fig.tight_layout()
    
    if save_fig:
        if mode == 'power_raw':
            chart_type = 'power_raw'
        elif mode == 'power':
            chart_type = 'power_resampled'
        else:
            chart_type = 'dam_level_and_status'
                
        filename = 'output/CS{}_{}_{}_{}.png'.format(case_study, level_name, factor, chart_type)
        fig.savefig(filename, bbox_inches='tight')
    
    return fig

1-factor


In [31]:
df = pd.read_csv('..\..\simulations\Case_study_1\output\CS1_simulation_data_export_1-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head()


Out[31]:
seconds 44L Level 44L Status Eskom ToU Pump system total power time_clean
0 0 75.962773 2.0 3 3800.0 1970-01-01 00:00:00
1 1 75.961277 2.0 3 3800.0 1970-01-01 00:00:01
2 2 75.959780 2.0 3 3800.0 1970-01-01 00:00:02
3 3 75.958283 2.0 3 3800.0 1970-01-01 00:00:03
4 4 75.956786 2.0 3 3800.0 1970-01-01 00:00:04

In [32]:
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))


44L Min: 28.629236878109136%  Max: 83.528641126261%

Plot dam level and status


In [33]:
fig = sim_plot(1, 1, 'level', '44L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)


Plot power

Raw data


In [34]:
fig = sim_plot(1, 1, 'power_raw', '44L', None, df['time_clean'], df['Pump system total power'], y_lim=4000, save_fig=True)


Resampled data


In [35]:
resampled = pd.DataFrame()
resampled['total_30_minute'] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()

resampled.to_csv('output/CS1_resampled_power_1_factor.csv')

fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)
#fig.patch.set_facecolor('grey')

x = resampled.index
y = resampled['total_30_minute']

ax1.plot(x,y, 'x', label='Power (data points resampled to 30 minutes)', zorder=4)
ax1.plot(x,y, '--', label='Power (resampled, line plot)', zorder=2)
ax1.plot(x,y, drawstyle='steps-post', label='Power (resampled, step plot)', zorder=3)
ax1.set_xlabel('Time of day')
ax1.set_ylabel('Power (kW)')
ax1.set_ylim([0, 4000])

ax1.axvspan('1970-01-01 00:00:00','1970-01-01 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('1970-01-01 06:00:00','1970-01-01 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('1970-01-01 07:00:00','1970-01-01 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('1970-01-01 10:00:00','1970-01-01 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 18:00:00','1970-01-01 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('1970-01-01 20:00:00','1970-01-01 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 22:00:00','1970-01-01 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)
#ax1.grid(which='minor', alpha=0.25)

plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()

fig.savefig('output/CS1_1_power_resampled.png', bbox_inches='tight'),

plt.close('all')


2-factor


In [36]:
df = pd.read_csv('..\..\simulations\Case_study_1\output\CS1_simulation_data_export_2-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head()


Out[36]:
seconds 44L Level 44L Status Eskom ToU Pump system total power time_clean
0 0 75.962773 2.0 3 3800.0 1970-01-01 00:00:00
1 1 75.961277 2.0 3 3800.0 1970-01-01 00:00:01
2 2 75.959780 2.0 3 3800.0 1970-01-01 00:00:02
3 3 75.958283 2.0 3 3800.0 1970-01-01 00:00:03
4 4 75.956786 2.0 3 3800.0 1970-01-01 00:00:04

In [37]:
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))


44L Min: 28.629236878109136%  Max: 83.528641126261%

Plot dam level and status


In [38]:
fig = sim_plot(1, 2, 'level', '44L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)


Plot power


In [39]:
resampled = pd.DataFrame()
resampled['total_30_minute'] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS1_resampled_power_2_factor.csv')

fig = sim_plot(1, 2, 'power', '44L', None, resampled.index, resampled['total_30_minute'], y_lim=4000, save_fig=True)


n-factor


In [40]:
df = pd.read_csv('..\..\simulations\Case_study_1\output\CS1_simulation_data_export_n-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head()


Out[40]:
seconds 44L Level 44L Status Eskom ToU Pump system total power time_clean
0 0 75.962773 2.0 3 3800.0 1970-01-01 00:00:00
1 1 75.961277 2.0 3 3800.0 1970-01-01 00:00:01
2 2 75.959780 2.0 3 3800.0 1970-01-01 00:00:02
3 3 75.958283 2.0 3 3800.0 1970-01-01 00:00:03
4 4 75.956786 2.0 3 3800.0 1970-01-01 00:00:04

In [41]:
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))


44L Min: 27.29933687812865%  Max: 80.68008112628145%

Plot dam level and status


In [42]:
fig = sim_plot(1, 'n', 'level', '44L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)


Plot power


In [43]:
resampled = pd.DataFrame()
resampled['total_30_minute'] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS1_resampled_power_n_factor.csv')

fig = sim_plot(1, 'n', 'power', '44L', None, resampled.index, resampled['total_30_minute'], y_lim=4000, save_fig=True)


Scoring graph


In [44]:
scores = pd.read_csv('scoring_results.csv').set_index('Score')
scores


Out[44]:
1-factor 2-factor n-factor
Score
Feature 16.666667 20.370370 33.333333
Performance 64.134236 64.134236 64.207095

In [45]:
sns.set()

score_sim = (scores['1-factor']['Performance'], scores['2-factor']['Performance'], scores['n-factor']['Performance'])
score_feat = (scores['1-factor']['Feature'], scores['2-factor']['Feature'], scores['n-factor']['Feature'])
ind = np.arange(len(score_sim))    # the x locations for the groups
width = 0.5       # the width of the bars: can also be len(x) sequence

plt.figure(figsize=figure_size)

p1 = plt.bar(ind, score_sim, width)
p2 = plt.bar(ind, score_feat, width, bottom=score_sim)


plt.ylabel('Scores')
plt.xlabel('Control system')
    #'x-factored simulation')
plt.xticks(ind, ('1-factor', '2-factor', r'$n$-factor'))
#plt.yticks(np.arange(0, 101, 10))
plt.gca().yaxis.set_major_locator(plt.NullLocator())
#plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), loc='best')
plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)

for r1, r2 in zip(p1, p2):
    h1 = r1.get_height()
    h2 = r2.get_height()
    plt.text(r1.get_x() + r1.get_width() / 2., h1 / 2., "%.2f" % h1, ha="center", va="center", color='white')
    plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 / 2., "%.2f" % h2, ha="center", va="center", color='white')
    plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 + 1.5, "%.2f" % (h1 + h2),
         ha="center", va="center", color='black', weight='bold')


plt.grid(False)
    
plt.tight_layout()
plt.savefig('output/CS1_final_scores.png', bbox_inches='tight', figsize=figure_size, dpi=1200)



In [ ]: